*! version 2.0
* 21 August 2013
* NIDS
* Merging individuals across waves 1 & 2

/*  

NOTE TO USER

This dataset contains individuals that are represented in both Wave 1 and Wave 2 of NIDS, thus TSM's from both waves are ignored, 
as are new Wave 2 CSM's introduced to the NIDS sample as a result of a Wave 1 CSM giving birth. We also ignore individuals that are 
deceased in Wave 2.

The resulting dataset is a balanced dataset of respondents that were alive in both W1 and W2. Their individual questionnaire 
outcomes have not been controlled for.

*/

*===========================================================================================================================================
* GLOBALS FOR DATA FILES AND VERSION SUFFIXES

global W1Data "\\137.158.104.21\data\Panel Public Release 2014a\Wave 1\Anon"
global W1VerIN "W1_Anon_V5.2"
global W2Data "\\137.158.104.21\data\Panel Public Release 2014a\Wave 2\Anon"
global W2VerIN "W2_Anon_V2.2"

global DataOUT "C:\Users\01406074\Desktop"
global VersionOUT "merged"

global temp "C:\Users\01406074\Desktop"					// tempfile to hold all the working datasets, all working datasets will
																// be deleted from this folder at the completion of the do file.
																		
version 12.0													// version of Stata being used, this is needed for the rename command.

*===========================================================================================================================================

* OPENING AND APPENDING W2 ADULT, CHILD & PROXY

* ADULT

use "$W2Data\Adult_$W2VerIN.dta", clear				// opening the dataset

rename w#_a* w#*									// replacing the current prefix with a wave specific prefix
gen w2_dataset = "Adult"							// generating a variable to indicate the source dataset

save "$temp\adult2.dta", replace					// saving the modified data as a temp file

* PROXY

use "$W2Data\Proxy_$W2VerIN.dta", clear				// opening the dataset

rename w#_p* w#*									// replacing the current prefix with a wave specific prefix
gen w2_dataset = "Proxy"							// generating a variable to indicate the source dataset

save "$temp\proxy2.dta", replace					// saving the modified data as a temp file


* CHILD

use "$W2Data\Child_$W2VerIN.dta", clear				// opening the dataset

rename w#_c* w#*									// replacing the current prefix with a wave specific prefix
gen w2_dataset = "Child"							// generating a variable to indicate the source dataset

append using "$temp\adult2.dta"						// appending the modified adult temp file to the modified child file
append using "$temp\proxy2.dta"						// appending the modified proxy temp file to the modified adult & child file

drop w2_outcome

save "$temp\w2_indi.dta", replace					// saving the modified data as a temp file

*-------------------------------------------------------------------------------------------------------------------------------------------

* OPENING AND APPENDING W1 ADULT, CHILD & PROXY

* ADULT

use "$W1Data\Adult_$W1VerIN.dta", clear				// opening the dataset

rename w#_a* w#*									// replacing the current prefix with a wave specific prefix
gen w1_dataset = "Adult"							// generating a variable to indicate the source dataset

save "$temp\adult1.dta", replace					// saving the modified data as a temp file

* PROXY

use "$W1Data\Proxy_$W1VerIN.dta", clear				// opening the dataset

rename w#_p* w#*									// replacing the current prefix with a wave specific prefix
gen w1_dataset = "Proxy"							// generating a variable to indicate the source dataset

save "$temp\proxy1.dta", replace					// saving the modified data as a temp file

* CHILD

use "$W1Data\Child_$W1VerIN.dta", clear				// opening the dataset

rename w#_c* w#*									// replacing the current prefix with a wave specific prefix
gen w1_dataset = "Child"							// generating a variable to indicate the source dataset

append using "$temp\adult1.dta"						// appending the modified adult temp file to the modified child file
append using "$temp\proxy1.dta"						// appending the modified proxy temp file to the modified adult & child file

*-------------------------------------------------------------------------------------------------------------------------------------------

* MERGING THE LINK FILE INTO W1 DATA, KEEPING ONLY RELEVANT VARIABLES. THEN MERGING IN W2 DATA

merge 1:1 pid w1_hhid using "$W2Data\Link_File_$W2VerIN.dta", keepusing(pid ///
w1_hhid w2_hhid w1_ind_outcome w2_ind_outcome) gen(w1_link)								// merging the link file into W1 data to get w2_hhid and outcomes.

keep if w1_link == 3																	// keeping where the W1 data merged into the link file

merge 1:1 pid w2_hhid using "$temp\w2_indi.dta", gen(w2_link)							// merging in the W2 indi data into link file dataset
drop if w2_link == 1																	// dropping CSM's that died between Wave 1 & Wave 2
drop if w2_link == 2																	// dropping if Wave 2 data does not have a corresponding W1 record,
																						// in other words dropping new W2 CSM's (babies to CSM mothers) and TSM's

order  pid w1_hhid w2_hhid w1_dataset w2_dataset  w1_ind_outcome w2_ind_outcome			// ordering the data 
sort pid  w1_hhid																		// sorting the data
drop w*_link																			// dropping variables created by the two merges.

save "$DataOUT\W1_W2_Indi_$VersionOUT.dta", replace										// saving out the created dataset

*-------------------------------------------------------------------------------------------------------------------------------------------

* ERASING THE TEMP FILES

erase "$temp\w2_indi.dta"
erase "$temp\adult2.dta"
erase "$temp\proxy2.dta"
erase "$temp\adult1.dta"
erase "$temp\proxy1.dta"

* end of do file

*===========================================================================================================================================

